Antibiotics gene linkage graph

Load generic libraries

source('configuration.r')
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths

Load plot specific libraries

library(foreach)
library(readr)
suppressMessages(library(igraph))
library(ggraph)
library(ggalluvial)

Create graph edge data

dat <- read.table('../tables/antibiotics_gene_linkage.tsv', head=FALSE, stringsAsFactors = FALSE) %>% 
  mutate(id=paste0(V1,':',V2, ':', V3)) %>% 
  mutate(V6=str_replace(V6, 'PheCmlA5', 'Phe'))

edges.full <- foreach(id=unique(dat$id), .combine=rbind) %do% {
    tmp <- dat[dat$id == id, ]
    if(nrow(tmp) < 2) {
        NULL
    }else{
        edge <- data.frame(t(combn(sort(tmp$V6), 2)), id=id)
        edge$dist <- foreach(r=1:nrow(edge), .combine = c) %do% {
          abs(rowMeans(tmp[tmp$V6 == edge[r, ]$X1, 4:5]) - rowMeans(tmp[tmp$V6 == edge[r, ]$X2, 4:5]))
        }
        edge
    }
}
## save for ad hoc analysis
write.table(edges.full, '../output_tables/ar_gene_linkage_edge_list_uncollapsed.tsv', row.names = F, quote=F, sep='\t')

Function for ploting

get_graph_data <- function(pattern='', reverse=FALSE){
  if(reverse){
    edges.collapsed <- filter(edges.full, !str_detect(id, pattern))
  }else{
    edges.collapsed <- filter(edges.full, str_detect(id, pattern)) 
  }
    
  ## filtering and collapse into edge weights
   edges.collapsed <- group_by(edges.collapsed, X1, X2) %>% 
    tally() %>% 
    filter(n>2)   ### keep edges with more than 2 support
  
  ## Run MCL graph clustering
  write.table(edges.collapsed, 'tmp_edges.tsv', quote=F, sep='\t', col.names = F, row.names = F)
  system('/mnt/software/bin/mcl tmp_edges.tsv  --abc -o tmp_mcl.tsv -overlap keep -I 3 2>/dev/null')
  mcl <- read_tsv('tmp_mcl.tsv', col_names = F)
  grp <- foreach(c=1:nrow(mcl), .combine = 'rbind') %do%{
    tmp <- as.character(na.exclude(as.character(mcl[c,])))
    data.frame(vertex=tmp, grp=c, stringsAsFactors = F)
  } %>% data.frame(row.names = 1)
  
  ## generate graph data
  g <- graph_from_data_frame(edges.collapsed, directed=FALSE)
  ## vertices
  colors <- pal_simpsons('springfield')(16)
  n.colors <- colors[as.numeric(as.factor(str_replace(V(g)$name, '.*_', '')))]
  V(g)$color <- n.colors
  V(g)$community <- grp[V(g)$name,]##cluster_walktrap(g)$membership
  V(g)$Type <- str_replace(V(g)$name, '.*_', '')
  ## Edges
  E(g)$width <- edges.collapsed$n
  E(g)$community <-
    apply(as.data.frame(get.edgelist(g, names=FALSE)), 1,
                      function(x)
                        ifelse(V(g)$community[x[1]] == V(g)$community[x[2]],
                                         LETTERS[V(g)$community[x[1]]], NA))
  ## format vertex names
  aux <- function(x){
      if(length(x) > 2){
          paste0(x[1],'_',x[2])
      }else{
          x[1]
      }
  }
  V(g)$name <- sapply(str_split(V(g)$name, '_'), aux)
  #### carbapenemase
  args <- read.table('../metadata/Bla_Carb.dat', head=F, stringsAsFactors=F)[,1]
  carb <- setNames(rep('No', length(V(g)$name)),V(g)$name)
  carb[names(carb) %in% args] <- 'Yes'
  V(g)$carb <- carb
  
  g
}

Main figure of all genes

g <- get_graph_data()
g_cols <- c('burlywood', pal_npg('nrc')(10)[2:7], 'purple',  'blue', 'darkgreen', 'pink', 'red', 'gold', 'black')
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
  geom_edge_arc(aes(width=width, color=community),
                curvature = 0.0,
                alpha=0.7,
                end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm'))   +
  scale_edge_color_manual(values=g_cols, 
                          na.value = rgb(0.8,0.8,0.8,0.5)) +
  geom_node_point(aes(fill=Type, color=carb),size= as.numeric(as.factor(V(g)$carb) )*5, shape=21) +
  scale_color_manual(values=c('grey','firebrick')) + 
  geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
  scale_edge_width(range = c(0.5,3)) +
  scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
  theme_void()
p

ggsave('../plots/fig3a.ar_gene_linkage_graph.pdf', p, width=16, height=16)

Sankey plot

Format data

edge.dat <- data.frame(v=V(g)$name, g=LETTERS[V(g)$community])

genome.info <- read.table('../tables/genome_info.dat', sep = '\t') 
merge(dat, genome.info, by=c(1,2)) %>% ## only take the high/medium quality ones
  select(Species=V1, gene=V6.x) %>% mutate(gene=str_replace(gene, '_.*','')) %>%
  filter(Species!='plasmid') -> sp.dat

sankey.dat <- merge(sp.dat, edge.dat, by.x=2, by.y=1) %>% 
  mutate(Species=str_replace(Species,'_',' ')) %>% 
  group_by(Species, g) %>% 
  count %>% 
  mutate(Antibiotics_cluster=as.character(g)) %>% 
  filter(n>2)

Plot

p <- ggplot(sankey.dat,aes(y=log10(n), axis1=Species, axis2=Antibiotics_cluster)) +
  geom_alluvium(aes(fill=Antibiotics_cluster), width=1/5)  +
  geom_stratum(width=1/5, fill=NA) +
  geom_text(stat = "stratum", label.strata = TRUE, size=6, fontface = "italic", nudge_x=-0.12, hjust=1) +
  scale_fill_manual(values=g_cols) +
  scale_x_discrete(limits = c("Species", "Antibiotics Cluster"), expand = c(.5, .05)) +
  theme_void() +
  theme()
p

ggsave('../plots/fig3b.ar_gene_linkage_sankey.pdf', p, width=18, height=18)

Supplementary figures

Staphylococcus aureus network

g <- get_graph_data('Staphylococcus_aureus')
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
  geom_edge_arc(aes(width=width, color=community),
                curvature = 0.0,
                alpha=0.7,
                end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm'))   +
  scale_edge_color_manual(values=g_cols, 
                          na.value = rgb(0.8,0.8,0.8,0.5)) +
  geom_node_point(aes(fill=Type, color=carb),size= as.numeric(as.factor(V(g)$carb) )*5, shape=21) +
  scale_color_manual(values=c('grey','firebrick')) + 
  geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
  scale_edge_width(range = c(0.5,3)) +
  scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
  theme_void()
p

ggsave('../plots/sup11.staph_aureus_ar_gene_linkage_graph.pdf', p, width=5, height=5)

Genome network

g <- get_graph_data('plasmid', reverse = TRUE)
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
  geom_edge_arc(aes(width=width, color=community),
                curvature = 0.0,
                alpha=0.7,
                end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm'))   +
  scale_edge_color_manual(values=g_cols, 
                          na.value = rgb(0.8,0.8,0.8,0.5)) +
  geom_node_point(aes(fill=Type, color=carb),size= as.numeric(as.factor(V(g)$carb) )*5, shape=21) +
  scale_color_manual(values=c('grey','firebrick')) + 
  geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
  scale_edge_width(range = c(0.5,3)) +
  scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
  theme_void()
p

ggsave('../plots/sup12.genome_ar_gene_linkage_graph.pdf', p, width=16, height=16)

Plasmid network

g <- get_graph_data('plasmid')
p <- ggraph(g, layout = 'fr')+#, circular=TRUE) +
  geom_edge_arc(aes(width=width, color=community),
                curvature = 0.0,
                alpha=0.7,
                end_cap=circle(1.5, 'mm'), start_cap=circle(1.5, 'mm'))   +
  scale_edge_color_manual(values=g_cols, 
                          na.value = rgb(0.8,0.8,0.8,0.5)) +
  geom_node_point(aes(fill=Type, color=carb),size= as.numeric(as.factor(V(g)$carb) )*5, shape=21) +
  scale_color_manual(values=c('grey','firebrick')) + 
  geom_node_text(aes(label = name), size=5, repel = TRUE, fontface='bold') +
  scale_edge_width(range = c(0.5,3)) +
  scale_fill_manual(values=pal_ucscgb('default')(16)[-c(1,3,4)]) +
  theme_void()
p

ggsave('../plots/sup12.plasmid_ar_gene_linkage_graph.pdf', p, width=10, height=7)

Session informaton

sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
## 
## Matrix products: default
## BLAS: /usr/lib64/R/lib/libRblas.so
## LAPACK: /usr/lib64/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] bindrcpp_0.2.2   ggalluvial_0.9.1 ggraph_1.0.2     igraph_1.2.2    
##  [5] readr_1.1.1      foreach_1.4.4    ggsci_2.9        reshape2_1.4.3  
##  [9] stringr_1.3.0    tibble_1.4.2     tidyr_0.8.0      dplyr_0.7.4     
## [13] gridExtra_2.3    ggplot2_3.0.0   
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.18      pillar_1.2.2      compiler_3.4.4   
##  [4] plyr_1.8.4        bindr_0.1.1       viridis_0.5.1    
##  [7] iterators_1.0.9   tools_3.4.4       digest_0.6.15    
## [10] viridisLite_0.3.0 evaluate_0.10.1   gtable_0.2.0     
## [13] pkgconfig_2.0.1   rlang_0.2.0       cli_1.0.0        
## [16] ggrepel_0.8.0     yaml_2.1.18       withr_2.1.2      
## [19] knitr_1.20        hms_0.4.2         tidyselect_0.2.4 
## [22] rprojroot_1.3-2   glue_1.2.0        R6_2.2.2         
## [25] rmarkdown_1.9     farver_1.0        tweenr_1.0.0     
## [28] purrr_0.2.4       magrittr_1.5      units_0.6-1      
## [31] MASS_7.3-49       backports_1.1.2   scales_1.0.0     
## [34] codetools_0.2-15  htmltools_0.3.6   assertthat_0.2.0 
## [37] ggforce_0.1.3     colorspace_1.3-2  labeling_0.3     
## [40] utf8_1.1.3        stringi_1.2.2     lazyeval_0.2.1   
## [43] munsell_0.5.0     crayon_1.3.4